#!/usr/bin/env expect
############################################################################
# Purpose: Test of Slurm functionality
#          Validate that preemption by qos is enforced
############################################################################
# Copyright (C) 2011-2014 SchedMD LLC
# Written by Nathan Yee <nyee32@schedmd.com>
#
# This file is part of Slurm, a resource management program.
# For details, see <https://slurm.schedmd.com/>.
# Please also read the included file: DISCLAIMER.
#
# Slurm is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Slurm; if not, write to the Free Software Foundation, Inc.
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
############################################################################
source ./globals
source ./globals_accounting

set user        ""
set acct_1      "${test_name}_acct1"
set acct_2      "${test_name}_acct2"
set qos_1       "${test_name}_qos1"
set qos_2       "${test_name}_qos2"
set file_in     "$test_dir/job_script"
set nodes       [llength [get_nodes_by_state]]
set job_id      0
set qos_1_id    0
set qos_2_id    0

# job states
set done_state      "DONE"
set pending_state   "PENDING"
set preempted_state "PREEMPTED"
set running_state   "RUNNING"

set eligible_now_job_directive "EligibleTime=now"

if {[get_config_param "PreemptType"] ne "preempt/qos"} {
	skip "This test requires that PreemptType=preempt/qos"
}
if {[get_config_param "AccountingStorageType"] ne "accounting_storage/slurmdbd"} {
	skip "This test requires use of Slurmdbd"
}
if {$nodes < 2} {
	skip "Not enough available nodes ($nodes < 2)"
}

regexp "($number)" [get_config_param "MinJobAge"] {} min_job_age
if {$min_job_age < 10} {
	skip "MinJobAge configured too low for this test ($min_job_age < 10)"
}

proc acct_setup { acct_name qos_name pre_qos pre_mode } {
	global user nodes sacctmgr

	set added 0
	spawn $sacctmgr -i create qos $qos_name preempt=$pre_qos preemptmode=$pre_mode maxnodes=$nodes
	expect {
		-re "Adding QOS" {
			incr added 1
			exp_continue
		}
		timeout {
			fail "sacctmgr is not responding"
		}
		eof {
			wait
		}
	}

	spawn $sacctmgr -i create account $acct_name qos=$qos_name
	expect {
		-re "Adding Account" {
			incr added 1
			exp_continue
		}
		timeout {
			fail "sacctmgr is not responding"
		}
		eof {
			wait
		}
	}

	spawn $sacctmgr -i add user $user account=$acct_name
	expect {
		-re "Associations" {
			incr added 1
			exp_continue
		}
		timeout {
			fail "sacctmgr is not responding"
		}
		eof {
			wait
		}
	}

	if {$added != 3} {
		fail "Account was not created properly"
	}
}

proc sub_job { acct1 {het_job false} } {
	global nodes file_in sbatch number

	set job_id 0
	if { $het_job } {
		set nodesless1 [expr $nodes - 1]
		spawn $sbatch -o/dev/null --exclusive -N1 -A$acct1 : -N$nodesless1 -A$acct1 $file_in
	} else {
		spawn $sbatch -o/dev/null --exclusive -N$nodes -A$acct1 $file_in
	}
	expect {
		-re "Submitted batch job ($number)" {
			set job_id $expect_out(1,string)
			exp_continue
		}
		timeout {
			fail "sbatch is not responding"
		}
		eof {
			wait
		}
	}

	if { $job_id == 0 } {
		fail "sbatch did not submit job"
	} else {
		return $job_id
	}
}

proc cleanup {} {
	global user acct_1 acct_2 qos_1 qos_2 qos_1_id qos_2_id sacctmgr

	set cleanacct(0) $acct_1
	set cleanacct(1) $acct_2
	set cleanqos(0) $qos_1
	set cleanqos(1) $qos_2

	set clean 0

	wait_for_account_done $acct_1,$acct_2

	for {set i 0} {$i<2} {incr i 1} {
		run_command "$sacctmgr -i remove user $user where account=$cleanacct($i)"
		spawn $sacctmgr delete -i account $cleanacct($i)
		expect {
			-re "(Deleting accounts|Nothing deleted)"  {
				incr clean 1
				exp_continue
			}
			timeout {
				fail "sacctmgr is not responding"
			}
			eof {
				wait
			}
		}

		spawn $sacctmgr delete -i qos $cleanqos($i)
		expect {
			-re "(Deleting QOS|Nothing deleted)" {
				incr clean 1
				exp_continue
			}
			timeout {
				fail "sacctmgr is not responding"
			}
			eof {
				wait
			}
		}
	}

	if { $clean != 4 } {
		log_warn "Unable to clean up accounts and qos"
	}
}

cleanup
make_bash_script $file_in "sleep 30"

set user [get_my_user_name]

# Preempt modes to test
set preempt_mode_cancel  "cancel"
set preempt_mode_requeue "requeue"
set preempt_mode_list "$preempt_mode_cancel $preempt_mode_requeue"

# Job types to test for each mode
set job_type_batch "batch"
set job_type_het   "het"
set job_type_list "$job_type_batch $job_type_het"

log_info "**** Cleanup from previous run ****"

acct_setup $acct_1 $qos_1 "" $preempt_mode_cancel
acct_setup $acct_2 $qos_2 $qos_1 cluster

foreach preempt_mode $preempt_mode_list {
	foreach job_type $job_type_list {
		set mode_type_string [string toupper "$preempt_mode $job_type"]
		log_info "*** TEST PREEMPT $mode_type_string JOB ***"

		set mod_qos_vals(preemptmode) $preempt_mode
		mod_qos $qos_1 [array get mod_qos_vals]

		if { $job_type == $job_type_batch } {
			set have_het_job 0
		} else {
			set have_het_job 1
		}

		set qos_1_id [sub_job $acct_1 $have_het_job]
		wait_for_job -fail $qos_1_id $running_state $have_het_job

		set qos_2_id [sub_job $acct_2]
		wait_for_job -fail $qos_2_id $running_state

		if { $preempt_mode == $preempt_mode_cancel } {
			set jobs_ok false
			wait_for {$jobs_ok} {
				set jobs_ok [check_job_state $qos_1_id $preempted_state $have_het_job]
			}
			subtest {$jobs_ok} "Job ($qos_1_id) should be fully in the $preempted_state state"
		} else {
			# Requeue state
			set jobs_ok false
			wait_for {$jobs_ok} {
				set jobs_ok [check_job_state $qos_1_id $pending_state $have_het_job]
			}
			subtest {$jobs_ok} "Job ($qos_1_id) should be fully in the $pending_state state"

			# Make job eligible to run now to avoid delay
			run_command "$scontrol update job $qos_2_id $eligible_now_job_directive"
			ait_for_job -fail $qos_2_id $done_state
			# Wait for requeued job to restart

			# Make job eligible to run now to avoid delay
			run_command "$scontrol update job $qos_1_id $eligible_now_job_directive"
			wait_for_job -fail $qos_1_id $running_state $have_het_job

			set jobs_ok false
			wait_for {$jobs_ok} {
				set jobs_ok [check_job_state $qos_1_id $running_state $have_het_job]
			}
			subtest {$jobs_ok} "Job ($qos_1_id) should be fully in the $running_state state"
		}

		wait_for_account_done $acct_1,$acct_2
	}
}
